﻿/*
%include /projects/data_commons/lbd_code/lbd_0_ch_ind.sas" /source2;
%merge_ch_ind(data_raw= );

Prepared by Feng Lin.

Note:
Adapted from readlbd3.sas
Use fk_naics, ind_naics, and ind_sic to match industry code
*/

/* Note: 
To abide by Census disclosure guidelines, references to variable names in the raw data have been renamed in
this and other code files. 
*/


/*
---------------------------------------------------------
Load industry crosswalk
---------------------------------------------------------
*/

proc import out=cross datafile="/projects/data_commons/cw/cw_ind_fk_imp.csv";
run;

proc import out=cross12 datafile="/projects/data_commons/cw/cw_ind_fk_imp_fk12.csv";
run;

proc import out=cross_naics datafile="/projects/data_commons/cw/cw_ind_naics_imp.csv";
run;

proc import out=cross_sic datafile="/projects/data_commons/cw/cw_ind_sic_imp.csv";
run;


/*
---------------------------------------------------------
New 3-step method given the fact that refnaics and refsic do not change during a plant's entire life
1. Assign industry code based on fk_naics;
2. For missing, assign industry code based on refnaics in the last year (2002-) and extend to other years if the first 2 digits of fk_naics are the same
3. For missing, assign industry code based on refsic in the last year (-2001) and extend to other years if the first 2 digits of fk_naics are the same
---------------------------------------------------------
*/

/* 
--------------------------------------------------------------------------------
macro for step 2 and 3 

c_ind: naics/sic (the industry code used to merge with ch_ind)
c_ind_by: the numeric code used to differentiate how we merge data
--------------------------------------------------------------------------------
*/

%macro m_merge_ind_s23(d_miss= , c_ind= , c_ind_by= );

/*
----------------------------------------
Subset years

We can only use naics for >= 2002 and sic for <= 2001.
*/

%if &c_ind.=naics %then %do;
data lbd_naics_y;
  set &d_miss.;
  if (year >= 2002);
run;
%end;
%else %if &c_ind.=sic %then %do;
data lbd_sic_y;
  set &d_miss.;
  if (year <= 2001);
run;
%end;

/*
----------------------------------------
Merge with crosswalk for years where refnaics/refsic are originated from
*/

proc sort data=cross_&c_ind.; by &c_ind.;
proc sort data=lbd_&c_ind._y; by &c_ind.;

data lbd_&c_ind._y;
  merge lbd_&c_ind._y(in=x) cross_&c_ind.(in=y);
  by &c_ind.;
  if x = 1;
run;

/*
----------------------------------------
Extend to other years using the assigned industry in the last available year
*/

proc sort data=lbd_&c_ind._y; by lbdid year;

data lbd_&c_ind._y;
  set lbd_&c_ind._y;
  if ch_ind ~= .;
run;

/* Keep ch_ind in the last year */
data lbd_&c_ind._y;
  set lbd_&c_ind._y;
  by lbdid;
  if last.lbdid;
  keep lbdid ch_ind fk_naics_2d;
  rename ch_ind=ch_ind_y;
run;

/* Merge data with ch_ind in the last year */

proc sort data=&d_miss.; by lbdid fk_naics_2d;
proc sort data=lbd_&c_ind._y; by lbdid fk_naics_2d;

data &d_miss.;
  merge &d_miss.(in=x) lbd_&c_ind._y(in=y);
  by lbdid fk_naics_2d;
  if x = 1 & y = 1 then ch_ind_update=1;
run;

/* Assign ch_ind based on ch_ind in the last year */
data &d_miss.;
  set &d_miss.;
  if ch_ind_update = 1 then ch_ind = ch_ind_y;
  if ch_ind_update = 1 then ch_ind_by = &c_ind_by.;
  drop ch_ind_update ch_ind_y;
run;

%mend;

/*
--------------------------------------------------------------------------------
Main Macro m_merge_ind
--------------------------------------------------------------------------------

Default fk_ver is empty
fk_ver=12 uses fk_naics12
*/


%macro m_merge_ind(data_raw= , fk_ver= );

/* 
----------------------------------------
Step 1 merge by fk_naics
*/

%put fk_naics version: &fk_ver.;

proc sort data=cross&fk_ver.; by fk_naics;
proc sort data=&data_raw.; by fk_naics;

data &data_raw.;
  merge &data_raw.(in=x) cross&fk_ver.(in=y);
  by fk_naics;
  if x = 1;
run;

data lbd_fknaics;
  set &data_raw.;
  if ch_ind ~= .;
  ch_ind_by = 1;
run;
/* Note that this dataset has only observation mapped by fk_naics. */

/* Now we shall deal with missing ind code after merging by fk_naics */

/* 
----------------------------------------
Step 2 Using refnaics
*/

data lbdt_miss;
  set &data_raw.;
  if ch_ind = .;
  fk_naics_2d = floor(fk_naics/10000);
  drop ch_ind;
run;
/* Note that this dataset has only observation still without ch_ind. */

%m_merge_ind_s23(d_miss=lbdt_miss, c_ind=naics, c_ind_by=2);

data lbd_naics;
  set lbdt_miss;
  if ch_ind ~= .;
  drop fk_naics_2d;
run;
/* Note that this dataset has only observation mapped by nacis. */

/*
----------------------------------------
Step 3 Using refsic
*/

data lbdt_miss;
  set lbdt_miss;
  if ch_ind = .;
  drop ch_ind;
run;
/* Note that this dataset has only observation still without ch_ind. */

%m_merge_ind_s23(d_miss=lbdt_miss, c_ind=sic, c_ind_by=3);

data lbd_sic;
  set lbdt_miss;
  drop fk_naics_2d;
run;
/* Note that this dataset has only observation mapped by sic or still without ch_ind. */


/*
----------------------------------------
Append the datasets from the previous steps
*/

data &data_raw.;
  set lbd_fknaics lbd_naics lbd_sic;
run;

%mend;

/* End of SAS file */
